********************************************************************************
global basepath ".../Data"
global results ".../Results"
*
cap n clear all
cap n clear matrix
cap n set max_memory 400g
cap n set maxvar 120000
cap n set segmentsize 1g
cap n set max_preservemem 100g
cap n set more off
cap n set checksum off
cap n set type double
cap n set processors 6
cap n version 16.1
********************************************************************************


********************************************************************************
** Table 8: Real Effects of the U-SACCO Program on Household Poverty
********************************************************************************

//  Prepare POST variable at the district level  
use no_switches_by_sector, clear  // contains post variable 
ren date_m mdate 
gen date = dofm(mdate)
gen year = year(date) 
tab year // 2008-2016 

// calculate the total share of switchers over entire period divided by initial total borrowers 

cap drop Total_Borrowers_MFI Total_Borrowers_SACCO
ren Total_Borrowers Total_Borrowers_SACCO
bys district: gen totborr2011=sum(Total_Borrowers_SACCO) if year == 2011 // generates baseline results 

collapse (mean) totborr2011 (sum)   SACCO_to_Bank SACCOorMFI_to_Bank  MFI_to_Bank, by(district) 
gen switcher_sacco=SACCO_to_Bank/totborr2011

su switcher_sacco, d  // ranges from 0-6.57%, very nice 
gen switcher_sacco_high25=0
replace switcher_sacco_high25=1 if switcher_sacco>r(p25)
label var switcher_sacco_high25 "High switching" // above 25th percentile of the distribution of switches 
 
ren district district_name 
save post_by_district_ready, replace 

///////////////////// ************************************************* ///////////////////// 
///////////////////// ************************************************* ///////////////////// 
///////////////////// now use the microdata from all EICV surveys     ///////////////////// 
///////////////////// ************************************************* ///////////////////// 
///////////////////// ************************************************* ///////////////////// 
** the list of district code is in the file district_codes
** this is a non-confidential file containing district names and codes from http://www.statoids.com/yrw.html 

global eicv3 "D:[...]\2010-2011\EICV3_Stata\EICV3_Stata" 
global eicv5 "D:[...]2016-2017\rwa-nisr-EICV5-data_STATA" 

// process the 2016-2017 EICV -- POST PROGRAM SURVEY 
use "$eicv5/EICV5_Poverty_file", clear 
ren  weight HH_WT 
ren  pov_jan  poverty 
ren epov_jan  epoverty 
keep hhid clust ae province district ur HH_WT pop_wt epoverty poverty

su district
gen temp = round(district)
cap drop district
ren temp district 

tostring district, replace 
gen temp = "0"
gen temp1=substr(district, 1,1)
gen temp2=substr(district, 2,1)
egen temp3 = concat(temp1 temp temp2) 
drop district*
ren temp3 district
drop temp* 
destring district, replace 

merge m:1 district using district_codes // non confidential file 
cap drop _m 

des 
ren ae hhsize 
replace ur=0 if ur==2 
gen rural=1-ur 
label var rural "Rural" 
cap drop ur 
label var district "District" 
gen survey = "EICV5-2016/2017"
save eicv5_ready", replace 

// process the 2010/2011 EICV -- PRE PROGRAM SURVEY 

use "$eicv4/EICV3_Povertyfile_Jan2014", clear 
keep hhid clust ae id1 district ur HH_WT pop_wt epoverty poverty
ren id1 province 
tab district
destring district, replace 
merge m:1 district using district_codes  // non confidential file 
cap drop _m 

des 
ren ae hhsize 
replace ur=0 if ur==2 
gen rural=1-ur 
label var rural "Rural" 
cap drop ur 
label var district "District" 
gen survey = "EICV3-2010/2011"
save eicv3_ready, replace 

// Append the data from the post-program survey 
append using eicv5_ready 

tab survey //  check, good 

tab district //  check, good 

// Merge with information on bank presence 
ren district district_code
ren district_name district 
merge m:1 district using data_for_maps_by_district.dta // check: we should have perfect match 
drop _merge

// Generate POST-program dummy and key RHS variables 
gen post=(survey=="EICV5-2016/2017") 

// Bank presence variables 
gen bank_presence=LowBankPresence_BankBranch
gen low_presence = 0
replace low_presence = 1 if bank_presence<.085 
// recall: low presence is below the 75th percentile of the distribution 
// definition *must* be consistent with FINSCOPE analysis  
gen high_presence = low_presence
recode high_presence (0=1) (1=0)
gen low_presence_post=low_presence*post
gen high_presence_post=high_presence*post

label var low_presence  "High bank presence" 
label var high_presence  "High bank presence" 
label var high_presence_post "High bank presence*Post"
label var low_presence_post "Low bank presence*post"

// Examine poverty variables
su poverty epoverty
label var poverty "Household in poverty" 
label var epoverty "Household in extreme poverty" 
*tabstat poverty epoverty, by(survey)  

foreach x of varlist poverty epoverty {
replace `x'=`x'/100
} 

// Merge w/ switching information to open up the POST Coefficient by Share of Switchers 

ren district district_name
merge m:1 district_name using post_by_district_ready // perfect 

gen post_switcher_sacco_high25 = switcher_sacco_high25 *post 
label var post_switcher_sacco_high25 "Post x High switching" 

gen switcher_mfi = MFI_to_Bank / 1000  // per 1,000 borrowers 
global controls rural hhsize 

 
//////////////////////////////////////////////// *********** //////////////////////////////////////////////// 
//////////////////////////////////////////////// Regressions //////////////////////////////////////////////// 
////////////////////////////////////////////////   TABLE 8  //////////////////////////////////////////////// 
//////////////////////////////////////////////// *********** //////////////////////////////////////////////// 

// All regressions have district FE and household controls 
// Regressions are weighted by household survey sampling weight HH_WT 
// SE's are clustered at the district level 

// Cols 1-3 + 4-6 of Table 8 
local append "replace"
foreach y  of varlist  poverty epoverty { 

// cols 1, 4 
	qui reghdfe `y' post $controls [aw=HH_WT] , absorb(district_id) cluster(district_id)
	sum `y' if e(sample)
	local mean = r(mean)
	outreg2 using "$stata\realeffects_eicv.xls", `append' nocons ctitle(`y') bdec(4) tdec(4) adec(4) addstat(R2adj, `e(r2_a)', Mean y, `mean')  ///
		se excel label addtext(District FE, Y, Household controls, Y)  keep(post) 
	local append "append"

// cols 2, 5 
	qui reghdfe `y' low_presence_post high_presence_post  $controls [aw=HH_WT], absorb(district_id) cluster(district_id)
	qui sum `y' if e(sample)
	local mean = r(mean)
	qui test low_presence_post == high_presence_post
	local test = r(p)
	outreg2 using "$stata\realeffects_eicv.xls", `append' nocons ctitle(`y') bdec(4) tdec(4) adec(4) addstat(R2adj, `e(r2_a)', Mean y, `mean', Equality test (p-value), `test')  ///
		se excel label addtext(District FE, Y, Household controls, Y)   keep( low_presence_post high_presence_post)
		
// cols 3, 6 
	reghdfe `y' c.low_presence_post#switcher_sacco_high25 high_presence_post  rural#switcher_sacco_high25 c.hhsize#switcher_sacco_high25 switcher_mfi [aw=HH_WT]  , absorb(district_id) cluster(district_id)
	qui sum `y' if e(sample)
	local mean = r(mean)
	qui test _b[0b.switcher_sacco_high25#c.low_presence_post] = _b[1.switcher_sacco_high25#c.low_presence_post] 
	local test = r(p)
	outreg2 using "$stata\realeffects_eicv.xls", `append' nocons ctitle(`y') bdec(4) tdec(4) adec(4) addstat(R2adj, `e(r2_a)', Mean y, `mean', Equality test (p-value), `test')  ///
		se excel label addtext(District FE, Y, Household controls, Y)  keep(c.low_presence_post#switcher_sacco_high25 high_presence_post ) 
			local append "append"		
}



********************************************************************************
** Table C19: Information Requests at Credit Bureau and New Bank Loans
********************************************************************************

// Information on files 
*CR_Hits_2018_19_BankMunicipalityTime.dta contains data on CRB queries and successful hits and # of loans 
*These data cover the period from January 2018 to December 2019 
*bank_BS_2017.dta // contains balance sheet data as of 2017Q1 

use CR_Hits_2018_19_BankMunicipalityTime.dta,  clear 

tab date_m
gen ddate=dofm(date_m) 
format ddate %td 
gen q=quarter(ddate) 
gen year = year(ddate) 
gen qdate = yq(year,q) 
format qdate %tq 

collapse (sum) new_credit new_noloans no_inquiries no_data_hits , by( bank_id qdate sector_id) 

// Prepare dependent variables 
foreach x of varlist new_credit new_noloans no_inquiries no_data_hits {
gen l`x'=log(1+`x')
} 

// Label variables 
label var lnew_noloans "No. new loans (log)" 
label var lno_inquiries "No. information requests (log)" 
label var lno_data_hits "NO. information requests with successfully identified individuals (log)" 

// Merge with SACCO balance sheet information 
merge m:1 bank_id using bank_BS_2017 // confidential file with balance sheet data 

cap erase "$stata/CRB_queries_lending.xls"
cap erase "$stata/CRB_queries_lending.xls"

// Dependent variable: Number of new loans (log)

// Cols 1-3: Add demand controls (municipality*time FE)  bank_id#sector_id 
 
xi: reghdfe lnew_noloans lno_inquiries  , noabsorb vce(cl  bank_id#sector_id) 
qui outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)')

xi: reghdfe lnew_noloans lno_inquiries  , a(sector_id#qdate) vce(cl  bank_id#sector_id) 
qui outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)') 
 
xi: reghdfe lnew_noloans lno_inquiries  , a(bank_id sector_id#qdate) vce(cl  bank_id#sector_id) 
 outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)') 

// Cols 7-9 

xi: reghdfe lnew_noloans lno_data_hits  , noabsorb vce(cl  bank_id#sector_id) 
qui outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)') 

xi: reghdfe lnew_noloans lno_data_hits  , a(sector_id#qdate) vce(cl  bank_id#sector_id) 
qui outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)') 
 
xi: reghdfe lnew_noloans lno_data_hits  , a(bank_id sector_id#qdate) vce(cl  bank_id#sector_id) 
 outreg2 using "$stata/CRB_queries_lending.xls",  nocons bdec(4) tdec(4) se excel label append  addstat(R2adj, `e(r2_a)')

 
 
********************************************************************************
** Table C20: Real Effects of the U-SACCO Program on Firm and Employment Creation
********************************************************************************

global census2014 "D:[...]\Establishment Census\Rwanda Establishment Census 2014" 

use "$census2014/rec-2014-data-v2", clear 

ren *, lower

// Basic data procesing 
keep if q8==1 // keep private sector enterprises 

cou // 74.179 firms 
*keep q1_2 q5_1 q5_2 q22h fi start_y q4_2  q4_3  q7 fi /// 
 *Male_worker Female_worker Total_workers q21  q20 q6_1  /// 
 *q24a q24b q24c q24d q24e q24f q24g q24h q24z
ren id2 district 
ren id3 municipality 

su district
gen temp = round(district)
cap drop district
ren temp district 

tostring district, replace 
gen temp = "0"
gen temp1=substr(district, 1,1)
gen temp2=substr(district, 2,1)
egen temp3 = concat(temp1 temp temp2) 
drop district*
ren temp3 district
drop temp* 
destring district, replace 

ren q5a month 
ren q5b year 
su month year 
drop if month==.
drop if year ==.
*drop if year == "Not stated"
su year 
drop if year == 9999
gen mdate=ym(year, month)
format mdate %tm 

gen date = dofm(mdate)
gen q=quarter(mdate) 
gen qdate=yq(year, q)
format qdate %tq 

keep if year>=2006  
tab year // 2006-2014 

ren q4b manager_sex
replace manager_sex=. if manager_sex==9 
replace manager_sex=2-manager_sex
tab manager_sex // 1 is male, 0 female 
ren q4c  manager_age 
replace manager_age =. if manager_age <15 
label var manager_sex "1: manager==male"
label var manager_age "manager's age" 

replace fi=2-fi 
tab fi  // 1: formal, according to their definition 
ren fi formal_fi 
tab q27h
ren q27h formal_tax 
label var formal_fi "Formal"
label var formal_tax "Formal-registered w/ tax authority"

label var q24 "Current employed capital" 
tab q24 // Frw 500,000 
replace q24 =. if q24 ==9 

gen capital_above500k=0  
replace capital_above500k=1 if q24>=2 
replace capital_above500k=. if q24==.
label var capital_above500k "Capital: above RWF500k" 

gen capital_above15mn=0  
replace capital_above15mn=1 if q24>=3 
replace capital_above15mn=. if q24==.
label var capital_above15mn "Capital: above RWF15mn"

ren q21c1 Total_workers 
ren q21c2 Male_worker
ren q21c3 Female_worker

su Total_workers, d // notice that 95% of all firms are microfirms 

// identify sole proprietorships (one worker) and microenterprises (max 5 workers) 

gen microfirm=0
replace microfirm=1 if Total_workers<=6  
replace microfirm=. if Total_workers==.
tab microfirm  //   97% of all firms are microfirms 
label var microfirm "Microfirm (Size<=6)" 

gen entrepreneur=0
replace entrepreneur=1 if Total_workers==1 
replace entrepreneur=. if Total_workers==.
tab entrepreneur  //  65% of all firms are sole-prioprietorships, i.e. size==1 
label var microfirm "Sole-proprietorship (Size=1)"

 
* industry classification 
ren isic1q6b q6_1 
tab q6_1 
ren q6_1 industry
replace industry=. if industry==99
tab industry 
gen industry_manufacturing=0 
replace industry_manufacturing=1 if industry ==3
replace industry_manufacturing=. if industry ==.

gen industry_trade=0 
replace industry_trade=1 if industry ==7
replace industry_trade=. if industry ==.

tabstat industry_trade industry_manufacturing, by(year) 

gen firmid=1 
replace formal_tax=2-formal_tax
tab formal_tax // good 
gen firmid_formaltax = firmid*formal_tax // 1: formal 
gen firmid_formalfi = firmid*formal_fi // 1: formal 
gen firmid_informaltax=1-firmid_formaltax 
gen firmid_informalfi=1-firmid_formalfi 

tabstat firmid firmid_formaltax firmid_informaltax , by(district) 
drop  firmid_formaltax firmid_informaltax  

* count the number of microenterprises and sole-proprietorships 
gen firmid_micro  = firmid*microfirm  
gen firmid_sole  = firmid*entrepreneur 
su firmid_micro firmid_sole
 
// collapse the data at the district/month level 
gcollapse (sum) firmid firmid_micro firmid_sole (sum) Total_workers, /// 
by(month year district mdate q qdate municipality ) 

 // merge with district names + information on bank presence 
merge m:1 district using district_codes // check: we should have perfect match 
cap drop _m  

ren district district_code 
ren district_name district 

decode municipality, gen(temp) 
cap drop municipality
ren temp municipality 
cap drop  q27ia- q30

// more cleaning 
drop if year<2008 
drop if month==0 

// check if some municipalities are assigned to two different districts and make corrections 
bys municipality: egen temp =nvals(district) 
tab temp 
*ed municipality district if temp ==2 // identify problematic municipalities
tab municipality  if temp>=2 // list of municipalities that have multiple district assignments 
// need to make sure they are assigned to the district from the CRB file 

tab district if municipality=="rugendabari" // burera and muhanga
// burera should be assigned to municipality=="rugengabari" 
*ed if municipality=="rugendabari"
tab mdate if municipality=="rugendabari"  
replace municipality="rugengabari"  if district=="burera" & municipality=="rugendabari" 

// continue manual corrections 
cap drop temp 
bys municipality: egen temp =nvals(district) 
tab temp 
distinct municipality  if temp>=2  // 29 sectors with multiple districts 
tab municipality  if temp>=2
tab district if municipality=="kanjongo"
distinct municipality // 380 
tab municipality 
replace municipality="kanjongo" if  municipality=="kanjogo"
replace municipality="mageregere" if  municipality=="mageragere"
replace municipality="muhazi" if  municipality=="muhazii"
replace municipality="shyrongi" if  municipality=="shyorongi"
replace municipality="ruganda" if  municipality=="buganda"
 
 
tab municipality
distinct municipality // 380 
 
cap drop temp 
bys municipality: egen temp =nvals(district) 
tab temp 
distinct municipality  if temp==2 // 23 sectors have two districts 
distinct municipality  if temp==3 // 5 sectors have two districts 
distinct municipality  if temp==4 // 1 sectors have two districts 

tab municipality  if temp==4  // remera 
tab municipality  if temp==3   
tab municipality  if temp>=2 

// drop clearly erroneous entries 
drop  if temp>=2 
distinct municipality // 	351 municipalities 

// set up the file as a balanced panel  as in the CRB analysis 
encode municipality, gen(sectorid)
*egen panelid=group(district_code sectorid) 
xtset sectorid mdate , monthly 
tsfill, full 
cap drop temp 

// make sure that municipality, district and district code variables are filled up 
bys sectorid: egen temp = mode(municipality)
replace municipality=temp if municipality==""
cap drop temp 

bys sectorid: egen temp = mode(district)
replace district=temp if district==""
cap drop temp 

bys sectorid: egen temp = mean(district_code)
replace district_code=temp if district_code==.
cap drop temp 
drop q qdate year month 

// prepare all dependent variables 

foreach x of varlist firmid firmid_micro firmid_sole Total_workers {
replace `x'=0 if `x'==.
} 

foreach x of varlist firmid firmid_micro firmid_sole  {
gen l`x'=log(1+`x') 
} 

gen totalemp = log(1+Total_workers) 

// merge with staggered SACCO deployment at the municipality/quarter level 

distinct municipality // 351 sectors in original data, square panel 
xtset sectorid mdate, monthly // 2008m1-2014m12 
ren municipality sector 
merge m:1 mdate sector using switches_by_sector_ready.dta // confidential file based on the CRB 

tab sector if _m==1 // unmatched sectors from the census file, we can drop  
drop if _m==1 
cap drop _m 
 
*tab post // defined in the CRB derived file 
*tab mdate // perfectly square panel for 337 municipalities 

 
// merge with bank presence  
replace sector="rurenge" if sector=="rusenge" // fix typo 
merge m:1 sector using data_for_maps_by_sector.dta // confieential file based on data from NBR 
drop if _m==2
cap drop _m 

 
cap ren LowBankPresence_InvBankBranch LowBankPresence_BankBranch  
gen bank_presence=LowBankPresence_BankBranch
gen bank_presence_post=bank_presence*post

cap drop low_presence
gen low_presence = 0
cap bys district: gen unique=_n 
su bank_presence, d 
display r(p75) 
replace low_presence = 1 if bank_presence<.085 // low bank presence is < 75th percentile  

gen high_presence = low_presence
recode high_presence (0=1) (1=0)
gen low_presence_post=low_presence*post
gen high_presence_post=high_presence*post
egen district_id=group(district)
xtset sector_id mdate, monthly 


// label variables 
label var mdate "Time trend"  
label var lfirmid_micro  "No. of new microenterprises (log)" 
label var lfirmid_sole  "No. of new sole entrepreneurships (log)" 
label var totalemp "NO. of new employees (log)" 


/////////////////////////////////// *********** /////////////////////////////////// 
/////////////////////////////////// REGRESSIONS /////////////////////////////////// 
///////////////////////////////////  TABLE C20 /////////////////////////////////// 
/////////////////////////////////// *********** /////////////////////////////////// 
 
// SEs clustered at municipality level (sector_id)  
// Cols 1-2 for microenterprises; cols 3-4 for sole proprietorships, and cols 5-6 for total new employees  

 local append "replace"
foreach y of varlist  lfirmid_micro lfirmid_sole totalemp {

	reghdfe `y' post   mdate , absorb(sector_id) cluster(sector_id)
	sum `y' if e(sample)
	local mean = r(mean)
	outreg2 using "$output\censusfirms_re.xls", `append' nocons ctitle(`y') bdec(4) tdec(4) adec(4) addstat(R2adj, `e(r2_a)', Mean y, `mean')  ///
	se excel label addtext(Municipality FE, Y, Time trend, Y)  
	local append "append"	

	reghdfe `y' c.post#low_presence   mdate c.mdate#low_presence, absorb(sector_id) cluster(sector_id)
	sum `y' if e(sample)
	local mean = r(mean)
	qui test _b[0b.low_presence#c.post]== _b[1.low_presence#c.post]
	local test = r(p)
	outreg2 using "$output\censusfirms_re.xls", `append' nocons ctitle(`y') bdec(4) tdec(4) adec(4) addstat(R2adj, `e(r2_a)', Mean y, `mean', p-value test Bank Presence, `test')  ///
		se excel label addtext(Municipality FE, Y, Time trend, Y)  	 
		
		
}


**************************************************************************************
** Table A2: Effect of U-SACCO Program on Access to Credit - Household Survey Evidence
**************************************************************************************

clear all
use "$basepath/Finscope_2012_2016.dta"
*
cd "$results"
global control "female young married poor no_education"
*
local append "replace"
foreach y of varlist loan loan_formal loan_informal loan_bank loan_SACCO {
	qui reghdfe `y' post $control [pw=weight], absorb(district_id) cluster(district_id)
	sum `y' if e(sample) & year==2012
	local mean = r(mean)
	outreg2 using "$results/Table_A2.xls", `append' nocons ctitle(`y') bdec(3) tdec(3) adec(3) addstat(R2adj, `e(r2_a)', Mean y, `mean')  ///
		se excel label addtext(District FE, Yes)  
	local append "append"
	qui reghdfe `y' low_presence_post high_presence_post $control [pw=weight], absorb(district_id) cluster(district_id)
	sum `y' if e(sample) & year==2012
	local mean = r(mean)
	test low_presence_post == high_presence_post
	local test = r(p)
	outreg2 using "$results/Table_A2.xls", `append' nocons ctitle(`y') bdec(3) tdec(3) adec(3) addstat(R2adj, `e(r2_a)', Mean y, `mean', Equality test (p-value), `test')  ///
		se excel label addtext(District FE, Yes)  

}

**************************************************************************************
** Table A1: Descriptive Statistics on Financial Inclusion - Household Survey Evidence
**************************************************************************************

file open des using "$results/Table_A1.xls", write replace
		file write des " & Mean	& St.Dev. & & Mean	& St.Dev. & & Mean	& St.Dev. " _n
		foreach x of varlist loan loan_formal loan_informal loan_bank loan_SACCO low_presence post $control {
			su `x' [aw=weight] if year==2012, det
			local media12=round(`r(mean)',0.001)
				local sd12=round(`r(sd)',0.001)
			su `x' [aw=weight] if year==2016, det
			local media16=round(`r(mean)',0.001)
				local sd16=round(`r(sd)',0.001)
			su `x' [aw=weight], det
			local media=round(`r(mean)',0.001)
				local sd=round(`r(sd)',0.001)

				file write des "`x'	& `media12'	& `sd12' & & `media16'	& `sd16' & & `media'	& `sd' &	" _n
	
		}
		file close des
